#!/usr/bin/env python3
# A4 Measure Invariance (hinge single-read) — self-contained engine (stdlib only)
import argparse, csv, hashlib, json, math, os, sys, time
from pathlib import Path

# ---------- utils ----------
def ensure_dir(p: Path): p.mkdir(parents=True, exist_ok=True)
def sha256_of_file(p: Path):
    h = hashlib.sha256()
    with p.open('rb') as f:
        for chunk in iter(lambda: f.read(1<<20), b''):
            h.update(chunk)
    return h.hexdigest()
def sha256_of_text(s: str): return hashlib.sha256(s.encode('utf-8')).hexdigest()
def write_json(p: Path, obj): ensure_dir(p.parent); p.write_text(json.dumps(obj, indent=2), encoding='utf-8')
def write_csv(p: Path, header, rows):
    ensure_dir(p.parent)
    with p.open('w', newline='', encoding='utf-8') as f:
        w = csv.writer(f); w.writerow(header); w.writerows(rows)

def load_json(p: Path, must_exist=True):
    if not p.exists():
        if must_exist: raise FileNotFoundError(f"Missing file: {p}")
        return {}
    return json.loads(p.read_text(encoding='utf-8'))

# ---------- single-read cache wrappers ----------
class SingleReadFile:
    """Enforces single physical read; subsequent calls return cached dict."""
    def __init__(self, path: Path, label: str):
        self.path = path
        self.label = label
        self._cache = None
        self.read_count = 0
        self.initial_hash = None
        self.final_hash = None

    def read_once(self):
        if self._cache is None:
            obj = load_json(self.path, must_exist=True)
            self._cache = obj
            self.read_count += 1
            self.initial_hash = sha256_of_file(self.path)
        return self._cache

    def snapshot_hash(self):
        # (Re)hash the on-disk file *without* counting as a logical read.
        if self.path.exists():
            return sha256_of_file(self.path)
        return None

# ---------- main ----------
def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--manifest', required=True)   # JSON
    ap.add_argument('--measure', required=True)    # JSON (phase measure, bins/epsilon)
    ap.add_argument('--hinge', required=True)      # JSON (hinge set metadata)
    ap.add_argument('--diag', required=True)       # JSON (tolerances & stages)
    ap.add_argument('--out', required=True)
    args = ap.parse_args()

    out_dir = Path(args.out)
    metrics_dir = out_dir/'metrics'
    audits_dir = out_dir/'audits'
    runinfo_dir = out_dir/'run_info'
    for d in [metrics_dir, audits_dir, runinfo_dir]:
        ensure_dir(d)

    # Load manifest once (not part of single-read audit, but hashed for provenance)
    manifest_path = Path(args.manifest)
    manifest = load_json(manifest_path, must_exist=True)
    manifest_hash = sha256_of_file(manifest_path)

    # Set up single-read wrappers
    measure_path = Path(args.measure)
    hinge_path   = Path(args.hinge)
    measure_file = SingleReadFile(measure_path, 'measure')
    hinge_file   = SingleReadFile(hinge_path, 'hinge')

    # Diagnostics config
    diag = load_json(Path(args.diag), must_exist=True)
    tol = diag.get('tolerances', {})
    tau_reads = int(tol.get('max_reads', 1))   # should be 1
    stages = int(diag.get('stages', 5))        # how many internal stages to simulate

    # ---- Stage 0: first (and only) logical read of measure + hinge ----
    meas = measure_file.read_once()
    hing = hinge_file.read_once()

    # Snapshot starting hashes
    meas_hash0 = measure_file.initial_hash
    hing_hash0 = hinge_file.initial_hash

    # ---- Stages 1..N: use cached values, do not re-read from disk ----
    # We simulate components that consult measure/hinge repeatedly;
    # all must use the cached objects (no extra file reads).
    stage_rows = []
    for s in range(1, stages+1):
        # Access cached dicts (no file i/o)
        _m = measure_file._cache
        _h = hinge_file._cache

        # derive a lightweight "use" so this isn't a no-op:
        # build a short fingerprint from keys/values
        m_sig = sha256_of_text(json.dumps(_m, sort_keys=True))[:12]
        h_sig = sha256_of_text(json.dumps(_h, sort_keys=True))[:12]

        # check on-disk hashes (should be identical to initial)
        meas_hash_now = measure_file.snapshot_hash()
        hing_hash_now = hinge_file.snapshot_hash()

        # record per-stage
        stage_rows.append([
            s, m_sig, h_sig, meas_hash_now, hing_hash_now,
            measure_file.read_count, hinge_file.read_count
        ])

    # Final hashes & counters
    measure_file.final_hash = measure_file.snapshot_hash()
    hinge_file.final_hash   = hinge_file.snapshot_hash()

    # Invariance checks
    read_ok = (measure_file.read_count <= tau_reads) and (hinge_file.read_count <= tau_reads)
    hash_ok = (measure_file.final_hash == meas_hash0) and (hinge_file.final_hash == hing_hash0)

    # PASS requires single logical read and unchanged hashes
    PASS = read_ok and hash_ok

    # ---- write artifacts ----
    # per-stage snapshots
    write_csv(
        metrics_dir/'measure_hinge_stages.csv',
        ['stage','measure_sig12','hinge_sig12','measure_hash','hinge_hash','measure_read_count','hinge_read_count'],
        stage_rows
    )

    # audit summary
    write_json(
        audits_dir/'measure_invariance.json',
        {
            "tolerances": {"max_reads": tau_reads},
            "measure": {
                "path": str(measure_path.as_posix()),
                "initial_hash": meas_hash0,
                "final_hash": measure_file.final_hash,
                "read_count": measure_file.read_count
            },
            "hinge": {
                "path": str(hinge_path.as_posix()),
                "initial_hash": hing_hash0,
                "final_hash": hinge_file.final_hash,
                "read_count": hinge_file.read_count
            },
            "hash_unchanged": hash_ok,
            "single_read_enforced": read_ok,
            "PASS": PASS
        }
    )

    # provenance
    write_json(
        runinfo_dir/'hashes.json',
        {
            "manifest_hash": manifest_hash,
            "measure_hash": meas_hash0,
            "hinge_hash": hing_hash0,
            "engine_entrypoint": f"python {Path(sys.argv[0]).name} --manifest <...> --measure <...> --hinge <...> --diag <...> --out <...>"
        }
    )

    # stdout summary
    summary = {
        "measure_reads": measure_file.read_count,
        "hinge_reads": hinge_file.read_count,
        "hash_unchanged": hash_ok,
        "PASS": PASS,
        "audit_path": str((audits_dir/'measure_invariance.json').as_posix())
    }
    print("A4 SUMMARY:", json.dumps(summary))

if __name__ == '__main__':
    try:
        main()
    except Exception as e:
        # explicit failure with reason
        try:
            out_dir = None
            for i,a in enumerate(sys.argv):
                if a == '--out' and i+1 < len(sys.argv): out_dir = Path(sys.argv[i+1])
            if out_dir:
                audits = out_dir/'audits'; ensure_dir(audits)
                write_json(audits/'measure_invariance.json',
                           {"PASS": False, "failure_reason": f"Unexpected error: {type(e).__name__}: {e}"})
        finally:
            raise
